rm(list = ls())
library(countrycode)
library(pscl)
library(foreign)
setwd("~/Desktop/QJPS Replication")

data <- read.csv("data/roll_call_matrix.csv")
data <- data[order(data$cowcode),]

##Miller et all democracy data
dem <- read.dta13("data/ccpcnc_v2_small.dta")

#combine democracy data with roll call matrix
data$match <- paste(data$cowcode, data$year)
dem$match <- paste(dem$cowcode, dem$year)
m <- match(data$match, dem$match)
table(is.na(m))

dem$country <- as.character(dem$country)
data$country_name <- dem$country[m]
data$country_name <- as.character(data$country_name)

#get rid of Mexico
data <- data[data$country_name != "Mexico",]

#subset data to only those that had a change occur
#some of the original data are missing because the CCP 
#authors haven't coded it yet. For example, the Bahamas only enter the datset in 2002, 
#but became a country in 1973.
data2 <- data[data$evnt == 1,]
data2 <- data2[,11:(ncol(data2)-3)]

#How many missing values are there for each entry
num.missing <- apply(data2, 1, function(x)sum(ifelse(is.na(x), 1, 0)))
prop.missing <- num.missing / ncol(data2)

#Get rid of those entries with more than 90 percent missing values
data3 <- data2[prop.missing < .9,]

#make the matrix a roll call object
mat <- rollcall(data3, yea = 1, nay = 0, legis.names = rownames(data3), vote.names = colnames(data3))

###################
#Determine Dimensionality

#Eigenvalues of the correlation matrix
r <- cor(t(data3) ,use="pairwise")
dim(r)
table(is.na(r))

e <- eigen(r)
lambda <- e$values
n <- dim(e)[1]


######################
#Figure 1 - left panel
######################
plot(1:20, lambda[1:20], ylab = "Eigenvalues", xlab = "Latent Dimension", type = "l", axes = F, 
     main = "Eigenvalues of the\nCorrelation Matrix")
points(1:20,lambda[1:20], pch = 16)
axis(2, at = seq(0, 800, 100), las = 2)
axis(1, at = seq(1, 20, 1), cex.axis = .8)






###################
###################
#scale the roll call matrix
#These take several (approx 8) hours to complete.

#1 dimensional model
set.seed(4227492)
idealpoints <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 1, store.item = T, normalize = T)

pred.idealpoints <- predict(idealpoints, cutoff=.5)
correct <- pred.idealpoints$overall.percent

#run 2-10d models in ideal to get %correct classified
set.seed(4227492)
idealpoints2 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 2, store.item = T, normalize = T)
y
y
pred.idealpoints2 <- predict(idealpoints2, cutoff=.5)
correct2 <- pred.idealpoints2$overall.percent

set.seed(4227492)
idealpoints3 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 3, store.item = T, normalize = T)
y
y
pred.idealpoints3 <- predict(idealpoints3, cutoff=.5)
correct3 <- pred.idealpoints3$overall.percent

set.seed(4227492)
idealpoints4 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 4, store.item = T, normalize = T)
y
y
pred.idealpoints4 <- predict(idealpoints4, cutoff=.5)
correct4 <- pred.idealpoints4$overall.percent

set.seed(4227492)
idealpoints5 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 5, store.item = T, normalize = T)
y
y
pred.idealpoints5 <- predict(idealpoints5, cutoff=.5)
correct5 <- pred.idealpoints5$overall.percent

set.seed(4227492)
idealpoints6 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 6, store.item = T, normalize = T)
y
y
pred.idealpoints6 <- predict(idealpoints6, cutoff=.5)
correct6 <- pred.idealpoints6$overall.percent

set.seed(4227492)
idealpoints7 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 7, store.item = T, normalize = T)
y
y
pred.idealpoints7 <- predict(idealpoints7, cutoff=.5)
correct7 <- pred.idealpoints7$overall.percent

set.seed(4227492)
idealpoints8 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 8, store.item = T, normalize = T)
y
y
pred.idealpoints8 <- predict(idealpoints8, cutoff=.5)
correct8 <- pred.idealpoints8$overall.percent

set.seed(4227492)
idealpoints9 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 9, store.item = T, normalize = T)
y
y
pred.idealpoints9 <- predict(idealpoints9, cutoff=.5)
correct9 <- pred.idealpoints9$overall.percent

set.seed(4227492)
idealpoints10 <- ideal(mat, maxiter = 10000, burnin = 4000, thin = 1, verbose = TRUE, d = 10, store.item = T, normalize = T)
y
y
pred.idealpoints10 <- predict(idealpoints10, cutoff=.5)
correct10 <- pred.idealpoints10$overall.percent

cor.class10 <- c(correct, correct2, correct3, correct4, correct5, 
                 correct6, correct7, correct8, correct9, correct10)
x <- 1:10


######################
#Figure 1 - right panel
######################
plot(x, cor.class10, ylim = c(0,100), pch = 16, 
     xlab = "Dimensions", ylab = "Percent", 
     type = "l", main = "Correct Classification of Votes", axes = F)
points(x,cor.class10, pch = 16)
axis(1, at = 1:10)
axis(2, at = seq(0, 100, 20), las = 2)
